#!/usr/bin/env jq
# compile-config-0.02-parse_inference_rules -- Parses inference rules in deepdive.inference.factors
##
include "constants";
include "util";

.deepdive_ as $deepdive

# parse inference rules, especially the function and weight fields as function_ and weight_
| .deepdive_.inference.factors_ = [
    $deepdive.inference.factors | to_entries[]
    | .key as $factorNameQualified
    | ($factorNameQualified | ltrimstr("factor/")) as $factorName
    | .value | . as $factorObject

    # some useful names for compilation
    | .factorName = $factorName
    | .factorsTable = "\(deepdivePrefixForFactorsTable)\($factorName)"
    | .weightsTable = "\(deepdivePrefixForWeightsTable)\($factorName)"

    # parse the weight field
    | .weight_ = try (.weight | tostring | trimWhitespace
        | if startswith("?") then
            # unknown weight, find parameters
            { is_fixed: false
            , params: (ltrimstr("?") | trimWhitespace
                | ltrimstr("(") | rtrimstr(")") | trimWhitespace
                | if length == 0 then [] else split("\\s*,\\s*"; "") end)
            , init_value: 0
            }
        else
            # fixed weight
            { is_fixed: true
            , params: []
            , init_value: tonumber
            }
        end
        ) catch error("deepdive.inference.factors.\($factorName).weight unrecognized: \(
            $factorObject.weight | @json)")

    # parse the function field
    | .function_ = (.function | trimWhitespace
        | capture("^ (?<name>.+)
                \\s* \\(
                \\s* (?<variables>.+)
                \\s* \\)
                   $"; "x") // error("deepdive.inference.factors.\($factorName
                        ) has an unrecognized function: \(@json)")
        | .name |= ascii_downcase
        # parse arguments to the function or predicate (variables)
        | .variables |= [ trimWhitespace | splits("\\s*,\\s*")
            | capture("^ (?<isNegated> !      )?
                    \\s* (?<columnLabel>
                            (?<columnPrefix>
                                (?<name> [^.]+ )
                    \\s*        \\. (?: [^.]+ \\. )*
                            )
                    \\s*    (?<columnLabelName>
                                [^.]+?
                            )
                         )
                    \\s* (?<isArray>   \\[\\] )?
                    \\s* (?: = (?<equalsTo> \\d+))?
                       $"; "x") // error("deepdive.inference.factors.\($factorName
                            ).function has an unrecognized variable argument: \(@json)")
            | .isNegated |= (length > 0)
            | .isArray   |= (length > 0)
            | .equalsTo  |= (if . then tonumber else null end)
            | .columnId   = "\(.columnPrefix)\(deepdiveVariableIdColumn)"
            # link this variable reference to its schema definition to simplify compilation
            | .schema     = $deepdive.schema.variables_byName[.name] //
                error("deepdive.inference.factors.\($factorName).function refers to an undefined variable: \(.name)")
            ]
        # assign the ordinal index to each variable
        | .variables |= [ . as $vars | range($vars | length) | . as $i | $vars[$i] | .ordinal = $i ]
        # map function name (case insensitive) to the code used in the binary format for the inference engine
        | .id =
            { imply       : 0
            , or          : 1
            , and         : 2
            , equal       : 3
            , istrue      : 4
            , linear      : 7
            , ratio       : 8
            , logical     : 9
            , imply3      : 11
            , andcategorical : 12
            }[.name] //
                error("deepdive.inference.factors.\($factorName
                    ) uses an unrecognized function: \(.name | @json)")

        # finds out whether it is defined over categorical variables
        | .isCategorical = (.name | in(
            { andcategorical: 1
            }))

        # check required/permitted number of parameters
        | if .name == "istrue" and (.variables | length) != 1
        then error("deepdive.inference.factors.\($factorName).function: '\(
            .name)' must be over exactly one variable but found \(.variables | length)")
        else . end
        | if .name == "equal" and (.variables | length) != 2
        then error("deepdive.inference.factors.\($factorName).function: '\(
            .name)' must be over exactly two variables but found \(.variables | length)")
        else . end

        # TODO check if all .variables type are categorical for categorical function
        )

]

# create a map to make it easy to access a factor by its name
| .deepdive_.inference.factors_byName = (.deepdive_.inference.factors_ | map({key: .factorName, value: .}) | from_entries)
